Setup

The settings list settings configure which parts, simulations, and analysis steps of this R markdown are executed. This way single components of the analysis can be disabled for the purpose of saving computation time. Setting settings$all to TRUE will set all other options to true regardless of their value.

settings <- list(
  # If true sets all other settings to TRUE
  all=F,
  # Generate interactive plots of co-expression structures
  investigate.coexpression=T,
  # Perform Differential Expression Analysis
  perform.DEA=T,
  # Generate paper figures
  generate.paper.figs=T,
  # Specify if paper figures are exported
  export.figures = F,
  # Path for file exports
  paper.fig.path = "../danawriteup/figs/",
  # Clears environment
  debug=TRUE
)
if(settings$all) {
  settings$investigate.coexpression=TRUE
  settings$perform.DEA=TRUE
  settings$generate.paper.figs=TRUE
}
if(settings$debug) rm(list=ls()[ls()!="settings"])

Parameter configuration

The config list contains all parameters for the analysis. Remember to always set the working directory and the paths to the data/external files prior to running the code.

config <- list(
  DANA.path = "./R/DANA.R",
  
  ## Data
  case.name = "TCGA",
  # Read count data file for the full data set
  data.file.path = "data/TCGA_harmonized_BRCA_UCS.RData",
  # RData file for coordinates data frame based on miRBase miRNA definitions
  coords.file.path = "data/miRBase.coords.RData",
  
  ## Normalization Methods
  # Specify which normalization methods will be applied
  norm.apply.TC = TRUE,
  norm.apply.UQ = TRUE,
  norm.apply.med = TRUE,
  norm.apply.TMM = TRUE,
  norm.apply.DESeq = TRUE,
  norm.apply.PoissonSeq = TRUE,
  norm.apply.QN = TRUE,
  norm.apply.RUV = TRUE,
  
  # thresholds for zero-expressed, poorly-expressed and well-expressed genes 
  t.zero = 2,  # zero-expressed in [0, t.zero)
  t.poor = 5,  # poorly-expressed in [t.zero, t.well]
  t.well = 100,  # well-expressed in [t.well, inf)
  # distance threshold for clustering
  cluster.threshold = 10000,
  # preprocessing data transformation type: none ("n"), modified log2 ("log2"),
  # or cube root ("cube") available
  preprocess.transform = "log2",
  
  ## Correlation Estimation for positive and negative controls
  # Available methods | Tuning parameter calibration schemes
  # "mb"              | "cv", "aic", "bic", "av"
  # "huge.mb"         | "ric", "stars"
  # "glasso"          | "ric", "stars", "ebic"
  # "fastggm"         | none
  # "pearson"         | none
  # "spearman"        | none
  
  # Positive Controls
  corr.method.pos = "mb",
  tuning.criterion.pos = "bic",
  # Negative Controls
  corr.method.neg = "pearson",
  tuning.criterion.neg = "",
  # Generate plots within DANA
  generate.plots = FALSE
)

Configuration for the differential expression analysis

config.DEA <- list(
  ## Data
  case.name = "TCGA",

  ## Normalization Methods
  # Specify which normalization methods will be applied
  norm.apply.TC = TRUE,
  norm.apply.UQ = TRUE,
  norm.apply.med = TRUE,
  norm.apply.TMM = TRUE,
  norm.apply.DESeq = TRUE,
  norm.apply.PoissonSeq = TRUE,
  norm.apply.QN = TRUE,
  norm.apply.RUV = FALSE,
  
  # Significance level for DEA
  alpha = 0.01,
  # Plots
  generate.volcano.plot = TRUE,
  generate.meanvar.plot = TRUE,
  # Use q-values (adjusted p-values) instead of p-values
  q.values = FALSE,
  # RUV reduces the parameter size. Reduce DEA result to RUV genes?
  perform.subsetting = FALSE
)

Load Packages

Load all required R libraries/packages.

# DANA functions
source(config$DANA.path)
## Warning: package 'ggplot2' was built under R version 4.0.3
## Warning: package 'gridExtra' was built under R version 4.0.3
## Warning: package 'ggnewscale' was built under R version 4.0.3
## Warning: package 'corrplot' was built under R version 4.0.3
## Warning: package 'stargazer' was built under R version 4.0.3
## Warning: package 'plotly' was built under R version 4.0.3
## Warning: package 'ggrepel' was built under R version 4.0.3
## Warning: package 'glmnet' was built under R version 4.0.3
## Warning: package 'huge' was built under R version 4.0.3
# Libraries
library(openxlsx)  # read excel files
library(corrplot) # For mixed correlation plots
library(cowplot) # Arrange plots
## Warning: package 'cowplot' was built under R version 4.0.3
library(RColorBrewer) # Colors
library(latex2exp) # for latex in axis labels 
library(ggcorrplot) # ggplot2 correlation plots
## Warning: package 'ggcorrplot' was built under R version 4.0.3

Load Data

Load the dataset under study

We load the data set into memory.

# Load the p x n matrix of read counts into the workspace
load(config$data.file.path)

# Unlist the data
data.BRCA <- TCGA.BRCA.UCS$BRCA
data.UCS <- TCGA.BRCA.UCS$UCS

# Transform gene names to lower case
rownames(data.BRCA)  <- tolower(rownames(data.BRCA))
rownames(data.UCS) <- tolower(rownames(data.UCS))

# Build single RC matrix
data.RC <- data.TCGA <- cbind(data.BRCA, data.UCS) 
data.groups <- c(rep("BRCA", dim(data.BRCA)[2]), rep("UCS", dim(data.UCS)[2]))

# Inspect
cat("Dimensions of the data: ", dim(data.RC), "\n")
## Dimensions of the data:  1881 223

miRNA Coordinates

Load corresponding miRNA chromosome coordinates

A coordinates data frame that specifies the base pair location of each miRNA of the data set on the chromosomes is loaded.

# Load miRNA chromosome location coordinates data frame
load(config$coords.file.path)
coords <- coords  # change according to the name of the loaded data matrix

Remove genes that cannot be found in the coordinates data frame

Some miRNAs map to multiple locations of sequence families. We named the sequence family with a parenthesis that reflects the number of members from all the different genomic locations (e.g. hsa-let-7a(3)). These miRNAs cannot be uniquely mapped to the genome, therefore we must exclude these from the location based analysis.

# only consider genes that are present in the coordinate data frame
genes.in.coords <- rownames(coords)[na.omit(match(rownames(data.RC), rownames(coords)))]
cat(dim(data.RC)[1]-length(genes.in.coords), " genes not found in coords. Reducing data set to ", length(genes.in.coords), "genes.\n")
## 33  genes not found in coords. Reducing data set to  1848 genes.
# test data set
data.RC <- data.RC[genes.in.coords, ]
# benchmark data set
genes <- rownames(data.RC)
rm(genes.in.coords)

Examine the Data

First, we investigate the distribution of mean miRNA expression of the data.

# Histogram plot test data set
par(mfrow=c(1,1))
df <- data.frame(log.expression=log2(rowMeans(data.RC)+1))
print(ggplot(df, aes(x=log.expression)) + 
        geom_histogram(binwidth = .1, color="black", fill="black") +
        geom_vline(xintercept = log2(config$t.zero+1), color="blue", linetype="dashed") +
        geom_vline(xintercept = log2(config$t.poor+1), color="blue", linetype="dashed") +
        geom_vline(xintercept = log2(config$t.well+1), color="red",  linetype="dashed") +
        ggtitle("Test data set"))

rm(df)

We observe that there is a large proportion of poorly-expressed genes. Some of them have extremely low or zero mean expression which corresponds to essentially zero reads.

DANA - Performance Analysis of Normalization Methods

We apply the full analysis pipeline to the data set. This includes:

  1. Apply the normalization methods to the raw counts.
  2. Define positive and negative controls based on read counts.
  3. Estimate the level of coexpression between miRNAs for each data set.
  4. Compare positive and negative controls before and after normalization.

The following parameters are used:

  • Case name: TCGA
  • Control definition:
    • Definition of negative controls, read count in [2, 5]
    • Definition of positive controls, read count in [100, inf]
    • Distance threshold for clustering: 10^{4}
  • Data preprocessing transformation: log2
  • Coexpression estimation:
    • Correlation estimation method for positive controls: mb
    • Correlation estimation tuning parameter calibration method for positive controls: bic
  • Comparison statistics:
    • Generate plots? FALSE
genes <- rownames(data.RC)

## Apply Normalization
data.norm <- normalize(data.RC,
                       groups=data.groups,
                       name=config$case.name,
                       apply.TC=config$norm.apply.TC,
                       apply.UQ=config$norm.apply.UQ,
                       apply.med=config$norm.apply.med,
                       apply.TMM=config$norm.apply.TMM,
                       apply.DESeq=config$norm.apply.DESeq,
                       apply.PoissonSeq=config$norm.apply.PoissonSeq,
                       apply.QN=config$norm.apply.QN,
                       apply.RUV=config$norm.apply.RUV)
## 1096 genes has been filtered because they contains too small number of reads across the experiments.
## Define Controls
pre.res <- define.controls(data.RC,
                           t.zero=config$t.zero,
                           t.poor=config$t.poor,
                           t.well=config$t.well,
                           t.cluster=config$cluster.threshold,
                           coords=coords,
                           clusters=NULL)
## Number of positive control markers with RC in [100, inf): 116
## Number of negative control markers with RC in [2, 5]: 91
pos.controls <- pre.res$genes.pos
neg.controls <- pre.res$genes.neg
clusters <- pre.res$clusters
clusters.data <- clusters


# Apply DANA pipeline
DANA.results <- DANA(data.RC=data.RC, 
                     data.norm=data.norm, 
                     pos.controls=pos.controls, 
                     neg.controls=neg.controls, 
                     clusters=clusters.data,
                     coords=coords,
                     case.name="benchmark",
                     generate.plots=config$generate.plots,
                     preprocess.transform=config$preprocess.transform,
                     corr.method.pos=config$corr.method.pos,
                     tuning.criterion.pos=config$tuning.criterion.pos,
                     corr.method.neg=config$corr.method.neg,
                     tuning.criterion.neg=config$tuning.criterion.neg) 
## Estimating correlations for pos. and neg. controls for data set benchmark...done
## Estimating correlations for pos. and neg. controls for data set TCGA.TMM...done
## Estimating correlations for pos. and neg. controls for data set TCGA.DESeq...done
## Estimating correlations for pos. and neg. controls for data set TCGA.PoissonSeq...done
## Estimating correlations for pos. and neg. controls for data set TCGA.TC...done
## Estimating correlations for pos. and neg. controls for data set TCGA.Med...done
## Estimating correlations for pos. and neg. controls for data set TCGA.RUVg...done
## Estimating correlations for pos. and neg. controls for data set TCGA.RUVs...done
## Estimating correlations for pos. and neg. controls for data set TCGA.RUVr...done
## Estimating correlations for pos. and neg. controls for data set TCGA.UQ...done
## Estimating correlations for pos. and neg. controls for data set TCGA.QN...done
## Comparing models benchmark vs. TCGA.TMM....done
## Comparing models benchmark vs. TCGA.DESeq....done
## Comparing models benchmark vs. TCGA.PoissonSeq....done
## Comparing models benchmark vs. TCGA.TC....done
## Comparing models benchmark vs. TCGA.Med....done
## Comparing models benchmark vs. TCGA.RUVg....done
## Comparing models benchmark vs. TCGA.RUVs....done
## Comparing models benchmark vs. TCGA.RUVr....done
## Comparing models benchmark vs. TCGA.UQ....done
## Comparing models benchmark vs. TCGA.QN....done
if(!config$generate.plots) {
  stargazer(DANA.results$metrics, type="text", summary=FALSE, digits=4, 
            title="Comparison statistics for the TCGA-BRCA/UCS data set", align=TRUE)
}
## 
## Comparison statistics for the TCGA-BRCA/UCS data set
## =============================
##                   cc    mcr  
## -----------------------------
##    TCGA.TMM     0.9904 0.7154
##   TCGA.DESeq    0.9896 0.7332
## TCGA.PoissonSeq 0.9882 0.7166
##     TCGA.TC     0.9927 0.5344
##    TCGA.Med     0.9245 0.3628
##    TCGA.RUVg    0.9854 0.7167
##    TCGA.RUVs    0.8288 0.8121
##    TCGA.RUVr    0.9763 0.7767
##     TCGA.UQ     0.9374 0.3981
##     TCGA.QN     0.9717 0.7834
## -----------------------------
iplot.data.noNorm <- iggplot.corr(DANA.results$data.model$pos$corr, clusters=clusters, title="Positive controls (un-normalized)", coords=coords)
iplot.data.TMM <- iggplot.corr(DANA.results$norm.models$TCGA.TMM$pos$corr, clusters=clusters, title="Positive controls (TMM)", coords=coords)

Investigate Co-expression Structures

We compare the estimated co-expression structures for the test and benchmark dataset. Here, we take a look at the models for the un-normalized data as well as TMM-normalized data. Note that you cannot directly compare the graphs of benchmark with test data since the considered set of genes is not identical.

if(settings$investigate.coexpression) {
  htmltools::tagList(list(iplot.data.noNorm, iplot.data.TMM))  # show plots in markdown
}

Differential Expression Analysis

Normalize Data

if(settings$perform.DEA) {
  ## Reset data
  data.RC <- data.TCGA 
  
  ## Normalize test Data
  data.norm <- normalize(data.RC,
                         groups=data.groups,
                         name="TCGA",
                         apply.TC=config.DEA$norm.apply.TC,
                         apply.UQ=config.DEA$norm.apply.UQ,
                         apply.med=config.DEA$norm.apply.med,
                         apply.TMM=config.DEA$norm.apply.TMM,
                         apply.DESeq=config.DEA$norm.apply.DESeq,
                         apply.PoissonSeq=config.DEA$norm.apply.PoissonSeq,
                         apply.QN=config.DEA$norm.apply.QN,
                         apply.RUV=FALSE)
  # RUV normalization
  if(config.DEA$norm.apply.RUV) {
    data.norm.RUV.adjust <- list(test.RUVr=norm.RUV(data.RC, data.groups, "RUVr")$adjust.factor,
                                 test.RUVs=norm.RUV(data.RC, data.groups, "RUVs")$adjust.factor,
                                 test.RUVg=norm.RUV(data.RC, data.groups, "RUVg")$adjust.factor)
  }

}
## 1124 genes has been filtered because they contains too small number of reads across the experiments.

Test DEA

if(settings$perform.DEA) {
  ## Perform DEA on full dataset
  test.DEA <- DE.voom(data.RC=data.RC, groups=data.groups, plot=config.DEA$generate.meanvar.plot, plot.title="Un-normalized")
  if(config.DEA$generate.volcano.plot) plot.DE.volcano(test.DEA, alpha=config.DEA$alpha, q.values=config.DEA$q.values, title="Un-normalized")
  
  ## DEA for normalized test data
  data.norm.DEA <- list()
  for(i in 1:length(data.norm)) {
    data.norm.DEA <- 
      append(data.norm.DEA, list(DE.voom(data.RC=data.norm[[i]], 
                                         groups=data.groups, 
                                         plot=config.DEA$generate.meanvar.plot,
                                         plot.title=names(data.norm)[i])))
    if(config.DEA$generate.volcano.plot) {
      plot.DE.volcano(data.norm.DEA[[i]], 
                      alpha=config.DEA$alpha, 
                      q.values=config.DEA$q.values, 
                      title=names(data.norm)[i])
    }
  }
  ## DEA for RUV normalization
  if(config.DEA$norm.apply.RUV) {
    for(i in 1:length(data.norm.RUV.adjust)) {
      data.norm.DEA <- 
        append(data.norm.DEA, list(DE.voom(data.RC=data.RC, 
                                           groups=data.groups, 
                                           plot=config.DEA$generate.meanvar.plot,
                                           plot.title=names(data.norm.RUV.adjust)[i],
                                           adjust=data.norm.RUV.adjust[[i]])))
      if(config.DEA$generate.volcano.plot) {
        plot.DE.volcano(data.norm.DEA[[i]], 
                        alpha=config.DEA$alpha, 
                        q.values=config.DEA$q.values, 
                        title=names(data.norm.RUV.adjust)[i])
      }
    }
    names(data.norm.DEA) <- c(names(data.norm), names(data.norm.RUV.adjust))
  } else {
    names(data.norm.DEA) <- names(data.norm)
  }
}

## number of DE genes: 1515

## number of DE genes: 1552

## number of DE genes: 1586

## number of DE genes: 1243

## number of DE genes: 597

## number of DE genes: 1198

## number of DE genes: 1134

## number of DE genes: 554

Results and Figures for Paper

Number of samples, markers, and positive and negative controls

if(settings$generate.paper.figs) {
  
# Dimension of data after analysis
cat("Data:\n")
cat("  - Dimensions: p=", dim(data.RC)[1],", n=", dim(data.RC)[2], "\n", sep="")
cat("  - Positive controls:\n")
cat("    * Definition mean RC in interval: [", config$t.well, ", inf ]\n", sep="")
cat("    * Number of positive controls miRNAs:", length(pos.controls), "\n")
cat("  - Negative controls:\n")
cat("    * Definition mean RC in interval: [", config$t.zero, ",", config$t.poor, "]\n")
cat("    * Number of negative controls miRNAs:", length(neg.controls), "\n")

}
## Data:
##   - Dimensions: p=1881, n=223
##   - Positive controls:
##     * Definition mean RC in interval: [100, inf ]
##     * Number of positive controls miRNAs: 116 
##   - Negative controls:
##     * Definition mean RC in interval: [ 2 , 5 ]
##     * Number of negative controls miRNAs: 91

Read Count Distribution in the Data

Per Data-Set

if(settings$generate.paper.figs) {
  par(mfrow=c(1,1))
  
  # Histogram plot test data set
  data.RC.log2 <- log2(rowMeans(data.RC)+1)
  df <- data.frame(log.expression=data.RC.log2)
  p.data.RC.hist <- ggplot(df, aes(x=log.expression)) + 
    geom_histogram(binwidth = .1, color="black", fill="black") +
    geom_vline(xintercept = log2(config$t.zero+1), color="blue", linetype="dashed") +
    geom_vline(xintercept = log2(config$t.poor+1), color="blue", linetype="dashed") +
    geom_vline(xintercept = log2(config$t.well+1), color="red",  linetype="dashed") +
    ggtitle("TCGA-BRCA/UCS data set") +
    theme_classic()
  print(p.data.RC.hist)

  
  p.RC.hist <- ggplot(df) + 
    theme_classic() +
    geom_histogram(aes(x=log.expression, y=..count..), binwidth = .1) +
    geom_vline(xintercept = log2(config$t.zero+1), color="#5851b8", linetype="twodash") +
    geom_vline(xintercept = log2(config$t.poor+1), color="#5851b8", linetype="twodash") +
    geom_vline(xintercept = log2(config$t.well+1), color="#E7298A",  linetype="longdash") +
    geom_segment(aes(x = log2(config$t.zero+1), y = 600, xend = log2(config$t.poor+1), yend = 600),
                 arrow=arrow(length=unit(.07, "in"), ends="both"),
                 color="#5851b8") +
    annotate(geom="label", x=(log2(config$t.zero+1)+log2(config$t.poor+1))/2.0, y=630,
             label="neg. contr.", color="#5851b8", size=4, fill = alpha(c("white"),0.85), label.size = NA) +
    geom_segment(aes(x = log2(config$t.well+1), y = 600, xend = log2(config$t.well+1)+4, yend = 600),
                 arrow=arrow(length=unit(.07, "in"), ends="last"),
                 color="#E7298A") +
    annotate(geom="label", x=log2(config$t.well+1)+.5, y=630,
             label="pos. contr.", color="#E7298A", size=4, hjust=0, fill = alpha(c("white"),0.85), label.size = NA) +
    xlab("Mean log2(read count)") +
    ylab("Frequency")
  print(p.RC.hist)
  
    ## Export Plots
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "BRCA_UCS_RC_Distribution.pdf"), p.RC.hist, width=5, height=4, device="pdf")
  }
  
}

Mean-Variance Plot

Distribution property among technical replicates

if(settings$generate.paper.figs) {
  # Function for mean-variance plot for a given data set
  mean.variance.plot <- function(data.RC, title, axis.limit=NULL) {
    df <- data.frame(data.mean=log10(rowMeans(data.RC) + 1),
                     data.var =log10(rowVars(data.RC) + 1))
    lin.fit <- lm(df$data.var ~ df$data.mean)$coefficients
    p <- ggplot(df,aes(x=data.mean,y=data.var)) + 
      geom_point(alpha=.25) + 
      xlab("log10(Mean)") + 
      ylab("log10(Variance)") + 
      geom_abline(intercept = lin.fit[1], slope = lin.fit[2], color="red", linetype="longdash", size=1) +
      geom_abline(intercept = 0, slope = 1, colour="blue") +
      annotate("text", alpha=1, x=4, y=0.5, hjust=0, vjust=0, size=3, colour="red",
               label=paste0("log10(Variance) = ", round(lin.fit[1],2), " + ", round(lin.fit[2],2), "*log10(Mean)")) + 
      xlim(0, ifelse(is.null(axis.limit), max(df), axis.limit)) +
      ylim(0, ifelse(is.null(axis.limit), max(df), axis.limit)) +
      ggtitle(title) +
      theme(aspect.ratio=1, plot.title = element_text(hjust = 0.5)) +
      theme_bw()
    return(p)
  }
  
  # Both subtypes
  p.meanvar <- mean.variance.plot(data.RC, "Full data set", axis.limit=12.5)
  print(p.meanvar)
  # BRCA
  p.meanvar.BRCA <- mean.variance.plot(data.RC[, data.groups=="BRCA"], "BRCA", axis.limit=12.5)
  print(p.meanvar.BRCA)
  # UCS
  p.meanvar.UCS <- mean.variance.plot(data.RC[, data.groups=="UCS"], "UCS", axis.limit=12.5)
  print(p.meanvar.UCS)
}

Marker-specific sd vs mean

if(settings$generate.paper.figs) {
  
  # Both subtypes
  p.meansd <- plot.mean.sd(data.RC, config$t.zero, config$t.poor, config$t.well, "Full data set")
  print(p.meansd)
  # BRCA
  p.meansd.BRCA <- plot.mean.sd(data.RC[, data.groups=="BRCA"], config$t.zero, config$t.poor, config$t.well, "BRCA")
  print(p.meansd.BRCA)
  # UCS
  p.meansd.UCS <- plot.mean.sd(data.RC[, data.groups=="UCS"], config$t.zero, config$t.poor, config$t.well, "UCS")
  print(p.meansd.UCS)
  
  
  ## Export Plots
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "BRCA_UCS_MeanSD.pdf"), p.meansd + labs(title = NULL), width = 5, height=4, device="pdf")
  }
}

Correlation Plots

if(settings$generate.paper.figs) {
  num.genes.plot.pos <- 60
  num.genes.plot.neg <- 60

  # Co-expression models
  data.model <- DANA.results$data.model
  data.norm.model <- DANA.results$norm.models$TCGA.DESeq
  
  # Common control miRNAs
  pos.controls <- rownames(data.model$pos$corr)
  pos.controls.norm <- rownames(data.norm.model$pos$corr)
  pos.controls.common <- intersect(pos.controls, pos.controls.norm)
  neg.controls <- rownames(data.model$neg$corr)
  neg.controls.norm <- rownames(data.norm.model$neg$corr)
  # reduce the number of genes for the plot
  num.genes.plot.pos <- min(num.genes.plot.pos, length(pos.controls.common))
  pos.controls.plot <- pos.controls.common[1:num.genes.plot.pos]
  
  num.genes.plot.neg <- min(num.genes.plot.neg,
                            dim(data.model$neg$corr)[1],
                            dim(data.norm.model$neg$corr)[1])
  
  # Subsetted correlation matrices
  corr.data.pos <- data.model$pos$corr[pos.controls.plot, pos.controls.plot]
  corr.data.DESeq.pos <- data.norm.model$pos$corr[pos.controls.plot, pos.controls.plot]
  corr.data.neg <- data.model$neg$corr[1:num.genes.plot.neg, 1:num.genes.plot.neg]
  corr.data.DESeq.neg <- data.norm.model$neg$corr[1:num.genes.plot.neg, 1:num.genes.plot.neg]
  
  
  ## Generate plots
  # Positive controls
  p.corr.pos <- ggplot.corr(corr.data.pos, 
                                 clusters=clusters, 
                                 threshold=0, 
                                 title="Un-normalized",
                                 coords=coords)
  p.corr.pos.DESeq <- ggplot.corr(corr.data.DESeq.pos, 
                                     clusters=clusters, 
                                     threshold=0, 
                                     title="DESeq",
                                     coords=coords)
  
  
  p.corr.pos.two <- arrangeGrob(p.corr.pos.DESeq + theme(legend.position="none"),
                                p.corr.pos + theme(legend.position="none"),
                                get.legend(p.corr.pos.DESeq + theme(legend.position = "bottom")),
                                layout_matrix=rbind(c(1,2), c(4,4)),
                                heights = c(5,1))
  grid.arrange(p.corr.pos.two)  # plot
  
  # Negative controls
  p.corr.neg <- ggplot.corr(corr.data.neg, 
                            clusters=clusters, 
                            threshold=0, 
                            title="Un-normalized")
  p.corr.neg.DESeq <- ggplot.corr(corr.data.DESeq.neg, 
                                  clusters=clusters, 
                                  threshold=0, 
                                  title="DESeq")
  
  p.corr.neg.two <- arrangeGrob(p.corr.neg.DESeq + theme(legend.position="none"),
                                p.corr.neg + theme(legend.position="none"),
                                get.legend(p.corr.neg.DESeq + theme(legend.position = "bottom")),
                                layout_matrix=rbind(c(1,2), c(4,4)),
                                heights = c(5,1))
  grid.arrange(p.corr.neg.two)  # plot
  
  
  ## Export Plots
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "BRCA_UCS_CorrPos_DESeq.pdf"), p.corr.pos.two, width = 5, height=3.5, device="pdf")
    ggsave(paste0(settings$paper.fig.path, "BRCA_UCS_CorrNeg_DESeq.pdf"), p.corr.neg.two, width = 5, height=3.5, device="pdf")
  }
}

All normalization methods

if(settings$generate.paper.figs) { 
  p.corr.noNorm <- ggplot.corr(DANA.results$data.model$pos$corr,
                             clusters=clusters,
                             title="un-normalized")
  p.corr.TMM <- ggplot.corr(DANA.results$norm.models$TCGA.TMM$pos$corr,
                            clusters=clusters,
                            title="TMM")
  p.corr.DESeq <- ggplot.corr(DANA.results$norm.models$TCGA.DESeq$pos$corr,
                              clusters=clusters,
                              title="DESeq")
  p.corr.PoissonSeq <- ggplot.corr(DANA.results$norm.models$TCGA.PoissonSeq$pos$corr,
                                   clusters=clusters,
                                   title="PoissonSeq")
  p.corr.TC <- ggplot.corr(DANA.results$norm.models$TCGA.TC$pos$corr,
                           clusters=clusters,
                           title="TC")
  p.corr.Med <- ggplot.corr(DANA.results$norm.models$TCGA.Med$pos$corr,
                            clusters=clusters,
                            title="Med")
  p.corr.RUVg <- ggplot.corr(DANA.results$norm.models$TCGA.RUVg$pos$corr,
                             clusters=clusters,
                             title="RUVg")
  p.corr.RUVs <- ggplot.corr(DANA.results$norm.models$TCGA.RUVs$pos$corr,
                             clusters=clusters,
                             title="RUVs")
  p.corr.RUVr <- ggplot.corr(DANA.results$norm.models$TCGA.RUVr$pos$corr,
                             clusters=clusters,
                             title="RUVr")
  p.corr.UQ <- ggplot.corr(DANA.results$norm.models$TCGA.UQ$pos$corr,
                           clusters=clusters,
                           title="UQ")
  p.corr.QN <- ggplot.corr(DANA.results$norm.models$TCGA.QN$pos$corr,
                           clusters=clusters,
                           title="QN")
  
  # Arrange plots
  p.corr.all <-
    plot_grid(p.corr.noNorm + theme(legend.position="none"),
              p.corr.TMM + theme(legend.position="none"),
              p.corr.DESeq + theme(legend.position="none"),
              p.corr.PoissonSeq + theme(legend.position="none"),
              p.corr.TC + theme(legend.position="none"),
              p.corr.Med + theme(legend.position="none"),
              p.corr.RUVg + theme(legend.position="none"),
              p.corr.RUVs + theme(legend.position="none"),
              p.corr.RUVr + theme(legend.position="none"),
              p.corr.UQ + theme(legend.position="none"),
              p.corr.QN + theme(legend.position="none"),
              get.legend(p.corr.noNorm + theme(legend.position = "bottom")),
              ncol=3)
  plot(p.corr.all)
  
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "BRCA_UCS_Corr_All.pdf"), p.corr.all, width=9, height=12, device="pdf")
  }
}

Correlation Frequency Plots

Subset for paper

if(settings$generate.paper.figs) {
  # Co-expression models
  model <- DANA.results$data.model
  norm.model1 <- DANA.results$norm.models$TCGA.DESeq
  norm.model2 <- DANA.results$norm.models$TCGA.RUVs
  
  # Set number of genes for negative correlation to minimum
  num.genes.plot.neg <- min(dim(model$neg$corr)[1],
                            dim(norm.model1$neg$corr)[1],
                            dim(norm.model2$neg$corr)[1])
  
  # Subsetted correlation matrices
  corr.neg <- model$neg$corr[1:num.genes.plot.neg, 1:num.genes.plot.neg]
  corr.norm1.neg <- norm.model1$neg$corr[1:num.genes.plot.neg, 1:num.genes.plot.neg]
  corr.norm2.neg <- norm.model2$neg$corr[1:num.genes.plot.neg, 1:num.genes.plot.neg]
  
  ## extract non-zero correlations
  nnz.corr.neg <- corr.neg[upper.tri(corr.neg)]
  nnz.corr.neg <- nnz.corr.neg[nnz.corr.neg != 0]
  nnz.corr.norm1.neg <- corr.norm1.neg[upper.tri(corr.norm1.neg)]
  nnz.corr.norm1.neg <- nnz.corr.norm1.neg[nnz.corr.norm1.neg != 0]
  nnz.corr.norm2.neg <- corr.norm2.neg[upper.tri(corr.norm2.neg)]
  nnz.corr.norm2.neg <- nnz.corr.norm2.neg[nnz.corr.norm2.neg != 0]
  
  # Keep positive correlations
  nnz.corr.neg <- nnz.corr.neg[nnz.corr.neg > 0]
  nnz.corr.norm1.neg <- nnz.corr.norm1.neg[nnz.corr.norm1.neg > 0]
  nnz.corr.norm2.neg <- nnz.corr.norm2.neg[nnz.corr.norm2.neg > 0]

  
  ## direct comparison of negative controls un-normalized vs. DESeq vs. RUVs
  neg.corr <- data.frame(
    control=factor(c(rep("Un-normalized", length(nnz.corr.neg)),
                     rep("DESeq", length(nnz.corr.norm1.neg)),
                     rep("RUVs", length(nnz.corr.norm2.neg)))),
    corr=abs(c(nnz.corr.neg, nnz.corr.norm1.neg, nnz.corr.norm2.neg))
    )
  # ttt <- data.frame(prop.table(table(method=neg.corr$control, corr=neg.corr$corr), 1))
  
  p.density.neg.nnz.freq <- ggplot(neg.corr, aes(x=corr, colour=control, linetype=control)) +
    geom_freqpoly(binwidth=.05, alpha=.8, size=.9) +
    theme_classic() +
    xlim(0, 1) +
    # scale_linetype_manual(values=c("twodash", "solid", "solid", "solid"))+
    scale_color_manual(values=c('#e7298a','#1B9E77','#D95F02')) + # First colors from rcolorbrewer set "Dark2"
    theme(legend.position=c(0.9,0.86), 
          legend.title=element_blank(),
          legend.background=element_rect(fill=alpha('white', 0.5))) +
    ylab("Frequency") +
    xlab("Marginal correlation")#+ ggtitle("Positive correlation among negative controls")
  print(p.density.neg.nnz.freq)
  
  
  
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "BRCA_UCS_CorrDensityNeg_Freq.pdf"), p.density.neg.nnz.freq, width = 5, height=4, device="pdf")
  }
}
## Warning: Removed 6 row(s) containing missing values (geom_path).

All methods

if(settings$generate.paper.figs) {
  # Co-expression models
  model <- DANA.results$data.model
  
  # Set number of genes for negative correlation to minimum
  num.genes.plot.neg <- min(dim(model$neg$corr)[1],
                            sapply(DANA.results$norm.models, function(x) dim(x$neg$corr)[1]))
  
  # Subsetted correlation matrices
  corr.neg <- model$neg$corr[1:num.genes.plot.neg, 1:num.genes.plot.neg]
  corr.norm.neg <- lapply(DANA.results$norm.models, function(x) x$neg$corr[1:num.genes.plot.neg, 1:num.genes.plot.neg])
  
  ## Strictly positive correlations
  nnz.corr.neg <- corr.neg[upper.tri(corr.neg)]
  nnz.corr.neg <- nnz.corr.neg[nnz.corr.neg > 0]
  nnz.corr.norm.neg <- lapply(corr.norm.neg, function(x) x[upper.tri(x)][x[upper.tri(x)] > 0])
  names(nnz.corr.norm.neg) <- substr(names(nnz.corr.norm.neg), 6, 20)
  
  control <- c(rep("un-normalized", length(nnz.corr.neg)))
  for(i in 1:length(nnz.corr.norm.neg)) {
    control <- c(control, rep(names(nnz.corr.norm.neg)[i], length(nnz.corr.norm.neg[[i]])))
  }
  ## direct comparison of negative controls
  neg.corr <- data.frame(
    control=factor(control),
    corr=abs(c(nnz.corr.neg, unname(unlist(nnz.corr.norm.neg))))
    )
  
  p.corr.frequency.neg.controls.all <- ggplot(neg.corr, aes(x=corr, colour=control, linetype=control)) +
    geom_freqpoly(binwidth=.05, alpha=.9, size=.9) +
    theme_classic() +
    xlim(0, 1) +
    scale_color_manual(values=brewer.pal(n=12, name="Paired")) + # First 2 colors from rcolorbrewer set "Dark2"
    theme(legend.position=c(0.85,0.6), 
          legend.title=element_blank(),
          legend.background=element_rect(fill=alpha('white', 0.5))) +
    ylab("Frequency") +
    xlab("Marginal correlation") + # ggtitle("Positive correlation among negative controls") + 
    theme(legend.key.width = unit(2,"cm"))
  print(p.corr.frequency.neg.controls.all)
  
  
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "BRCA_UCS_CorrDensityNeg_Freq_All.pdf"), p.corr.frequency.neg.controls.all, width = 8, height=5, device="pdf")
  }
}
## Warning: Removed 22 row(s) containing missing values (geom_path).

Correlation Scatter Plots

Subset for paper

if(settings$generate.paper.figs) {
  ## Generate correlation scatter plots for positive controls for TMM and UQ
  par(mfrow=c(1,1))
  
  # un-normalized vs RUVr
  p.scatter.RUVr <- plot.corr.scatter(
    corr1=DANA.results$data.model$pos$corr, 
    corr2=DANA.results$norm.models$TCGA.RUVr$pos$corr,
    clusters=clusters.data,
    title="RUVr Normalization",
    xlab="un-normalized", 
    ylab="RUVr", 
    threshold=0)
  print(p.scatter.RUVr)
  # un-normalized vs Med
  p.scatter.Med <- plot.corr.scatter(
    corr1=DANA.results$data.model$pos$corr, 
    corr2=DANA.results$norm.models$TCGA.Med$pos$corr,
    clusters=clusters.data,
    title="Med Normalization",
    xlab="un-normalized", 
    ylab="Med", 
    threshold=0)
  print(p.scatter.Med)
  
  # Save plots
  if(settings$export.figures) {
    # ggsave(paste0(settings$paper.fig.path, "BRCA_UCS_ScatterRUVr.pdf"), p.scatter.RUVr, width=4, height=4, device="pdf")
    # ggsave(paste0(settings$paper.fig.path, "BRCA_UCS_ScatterMed.pdf"), p.scatter.Med, width=4, height=4, device="pdf")
  }
}

All normalization methods for supplementary materials

if(settings$generate.paper.figs) {
  par(mfrow=c(1,1))
  
  # un-normalized vs TMM
  p.scatter.TMM <- plot.corr.scatter(
    corr1=DANA.results$data.model$pos$corr, 
    corr2=DANA.results$norm.models$TCGA.TMM$pos$corr,
    clusters=clusters, title="TMM", xlab="un-normalized", ylab="TMM", 
    ccc=round(DANA.results$metrics["TCGA.TMM", "cc"],3))
  # un-normalized vs DESeq
  p.scatter.DESeq <- plot.corr.scatter(
    corr1=DANA.results$data.model$pos$corr, 
    corr2=DANA.results$norm.models$TCGA.DESeq$pos$corr,
    clusters=clusters, title="DESeq", xlab="un-normalized", ylab="DESeq", 
    ccc=round(DANA.results$metrics["TCGA.DESeq", "cc"],3))
  # un-normalized vs PoissonSeq
  p.scatter.PoissonSeq <- plot.corr.scatter(
    corr1=DANA.results$data.model$pos$corr, 
    corr2=DANA.results$norm.models$TCGA.PoissonSeq$pos$corr,
    clusters=clusters, title="PoissonSeq", xlab="un-normalized", ylab="PoissonSeq", 
    ccc=round(DANA.results$metrics["TCGA.PoissonSeq", "cc"],3))
  # un-normalized vs TC
  p.scatter.TC <- plot.corr.scatter(
    corr1=DANA.results$data.model$pos$corr, 
    corr2=DANA.results$norm.models$TCGA.TC$pos$corr,
    clusters=clusters, title="TC", xlab="un-normalized", ylab="TC", 
    ccc=round(DANA.results$metrics["TCGA.TC", "cc"],3))
  # un-normalized vs Med
  p.scatter.Med <- plot.corr.scatter(
    corr1=DANA.results$data.model$pos$corr, 
    corr2=DANA.results$norm.models$TCGA.Med$pos$corr,
    clusters=clusters, title="Med", xlab="un-normalized", ylab="Med", 
    ccc=round(DANA.results$metrics["TCGA.Med", "cc"],3))
  # un-normalized vs RUVg
  p.scatter.RUVg <- plot.corr.scatter(
    corr1=DANA.results$data.model$pos$corr, 
    corr2=DANA.results$norm.models$TCGA.RUVg$pos$corr,
    clusters=clusters, title="RUVg", xlab="un-normalized", ylab="RUVg", 
    ccc=round(DANA.results$metrics["TCGA.RUVg", "cc"],3))
  # un-normalized vs RUVs
  p.scatter.RUVs <- plot.corr.scatter(
    corr1=DANA.results$data.model$pos$corr, 
    corr2=DANA.results$norm.models$TCGA.RUVs$pos$corr,
    clusters=clusters, title="RUVs", xlab="un-normalized", ylab="RUVs", 
    ccc=round(DANA.results$metrics["TCGA.RUVs", "cc"],3))
  # un-normalized vs RUVr
  p.scatter.RUVr <- plot.corr.scatter(
    corr1=DANA.results$data.model$pos$corr, 
    corr2=DANA.results$norm.models$TCGA.RUVr$pos$corr,
    clusters=clusters, title="RUVr", xlab="un-normalized", ylab="RUVr", 
    ccc=round(DANA.results$metrics["TCGA.RUVr", "cc"],3))
  # un-normalized vs UQ
  p.scatter.UQ <- plot.corr.scatter(
    corr1=DANA.results$data.model$pos$corr, 
    corr2=DANA.results$norm.models$TCGA.UQ$pos$corr,
    clusters=clusters, title="UQ", xlab="un-normalized", ylab="UQ", 
    ccc=round(DANA.results$metrics["TCGA.UQ", "cc"],3))
  # un-normalized vs QN
  p.scatter.QN <- plot.corr.scatter(
    corr1=DANA.results$data.model$pos$corr, 
    corr2=DANA.results$norm.models$TCGA.QN$pos$corr,
    clusters=clusters, title="QN", xlab="un-normalized", ylab="QN", 
    ccc=round(DANA.results$metrics["TCGA.QN", "cc"],3))
  
  p.scatter.TCGA.all <- plot_grid(p.scatter.TMM,
                                  p.scatter.DESeq,
                                  p.scatter.PoissonSeq,
                                  p.scatter.TC,
                                  p.scatter.Med,
                                  p.scatter.RUVg,
                                  p.scatter.RUVs,
                                  p.scatter.RUVr,
                                  p.scatter.UQ,
                                  p.scatter.QN,
                                  ncol = 3,
                                  align="hv")
  plot(p.scatter.TCGA.all)
  
  
  # Save plots
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "BRCA_UCS_Scatter_All.pdf"), p.scatter.TCGA.all, width=9, height=12, device="pdf")
  }
}
## Warning in plot.corr.scatter(corr1 = DANA.results$data.model$pos$corr, corr2 = DANA.results$norm.models$TCGA.RUVs$pos$corr, : Warning: Shapes of correlation matrices in plot.corr.scatter do not match! 
##             Reducing correlation matrices to all mutual genes.

DANA Result Metrics

if(settings$generate.paper.figs) { 
  options(scipen=4) # force non-scientific notation of x axis
  
  test.DANA.metrics <- data.frame(
    method=substr(rownames(DANA.results$metrics), 6, 20),
    cc=DANA.results$metrics[, "cc"],
    mcr=DANA.results$metrics[, "mcr"]
  )
  
  p.DANA.metrics <- ggplot(test.DANA.metrics,aes(x=mcr,y=cc, label=method)) + 
    geom_point(alpha=.5) +
    theme_classic() + 
    xlab(TeX("$mcr^-$; Relative reduction of handling effects")) + 
    ylab(TeX("$cc^+$; Biological signal preservation")) + 
    geom_text_repel(aes(label = method), size=3, max.overlaps = Inf, min.segment.length = 0.2) +
    # xlim(c(min(c(0, test.mposc.reduction)),max(test.mposc.reduction))) +
    scale_x_continuous(labels = scales::percent, limits=c(0,1.05), 
                       breaks = scales::pretty_breaks(n = 5)) +
    # ylim(c(0,1)) 
    scale_y_continuous(labels = scales::percent, limits = c(0,1))
  print(p.DANA.metrics)
  
  print("DANA Statistics for the TCGA-BRCA/UCS")
  test.DANA.metrics$cc <- round(test.DANA.metrics$cc,3)
  test.DANA.metrics$mcr <- round(test.DANA.metrics$mcr,3)
  print(test.DANA.metrics)
  
  
  
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "BRCA_UCS_DANAMetrics.pdf"), p.DANA.metrics, width=4.5, height=3.5, device="pdf")
  }
  
}

## [1] "DANA Statistics for the TCGA-BRCA/UCS"
##        method    cc   mcr
## 1         TMM 0.990 0.715
## 2       DESeq 0.990 0.733
## 3  PoissonSeq 0.988 0.717
## 4          TC 0.993 0.534
## 5         Med 0.924 0.363
## 6        RUVg 0.985 0.717
## 7        RUVs 0.829 0.812
## 8        RUVr 0.976 0.777
## 9          UQ 0.937 0.398
## 10         QN 0.972 0.783

Package Information

sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=German_Germany.1252  LC_CTYPE=German_Germany.1252   
## [3] LC_MONETARY=German_Germany.1252 LC_NUMERIC=C                   
## [5] LC_TIME=German_Germany.1252    
## 
## attached base packages:
##  [1] stats4    splines   parallel  stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] ggcorrplot_0.1.3            latex2exp_0.4.0            
##  [3] RColorBrewer_1.1-2          cowplot_1.1.0              
##  [5] openxlsx_4.2.2              ffpe_1.32.0                
##  [7] TTR_0.24.2                  DescTools_0.99.38          
##  [9] vsn_3.56.0                  RUVSeq_1.22.0              
## [11] EDASeq_2.22.0               ShortRead_1.46.0           
## [13] GenomicAlignments_1.24.0    SummarizedExperiment_1.18.2
## [15] DelayedArray_0.14.1         matrixStats_0.56.0         
## [17] Rsamtools_2.4.0             GenomicRanges_1.40.0       
## [19] GenomeInfoDb_1.24.2         Biostrings_2.56.0          
## [21] XVector_0.28.0              IRanges_2.22.2             
## [23] S4Vectors_0.26.1            sva_3.36.0                 
## [25] BiocParallel_1.22.0         genefilter_1.70.0          
## [27] mgcv_1.8-33                 nlme_3.1-149               
## [29] PoissonSeq_1.1.2            combinat_0.0-8             
## [31] DESeq_1.39.0                lattice_0.20-41            
## [33] locfit_1.5-9.4              Biobase_2.48.0             
## [35] BiocGenerics_0.34.0         edgeR_3.30.3               
## [37] limma_3.44.3                FastGGM_1.0                
## [39] Rcpp_1.0.5                  huge_1.3.4.1               
## [41] glmnet_4.1                  Matrix_1.2-18              
## [43] ggrepel_0.9.1               plotly_4.9.3               
## [45] stargazer_5.2.2             corrplot_0.84              
## [47] ggnewscale_0.4.5            gridExtra_2.3              
## [49] ggplot2_3.3.3              
## 
## loaded via a namespace (and not attached):
##   [1] R.utils_2.10.1            tidyselect_1.1.0         
##   [3] RSQLite_2.2.0             AnnotationDbi_1.50.3     
##   [5] htmlwidgets_1.5.3         grid_4.0.2               
##   [7] munsell_0.5.0             codetools_0.2-16         
##   [9] preprocessCore_1.50.0     nleqslv_3.3.2            
##  [11] withr_2.3.0               colorspace_1.4-1         
##  [13] knitr_1.30                rstudioapi_0.11          
##  [15] labeling_0.4.2            GenomeInfoDbData_1.2.3   
##  [17] hwriter_1.3.2             farver_2.0.3             
##  [19] bit64_4.0.5               rhdf5_2.32.4             
##  [21] vctrs_0.3.4               generics_0.0.2           
##  [23] xfun_0.17                 BiocFileCache_1.12.1     
##  [25] R6_2.4.1                  illuminaio_0.30.0        
##  [27] bitops_1.0-6              reshape_0.8.8            
##  [29] assertthat_0.2.1          scales_1.1.1             
##  [31] rootSolve_1.8.2.1         gtable_0.3.0             
##  [33] affy_1.66.0               methylumi_2.34.0         
##  [35] lmom_2.8                  rlang_0.4.7              
##  [37] rtracklayer_1.48.0        lazyeval_0.2.2           
##  [39] GEOquery_2.56.0           BiocManager_1.30.10      
##  [41] yaml_2.2.1                crosstalk_1.1.0.1        
##  [43] GenomicFeatures_1.40.1    tools_4.0.2              
##  [45] nor1mix_1.3-0             affyio_1.58.0            
##  [47] ellipsis_0.3.1            lumi_2.40.0              
##  [49] siggenes_1.62.0           plyr_1.8.6               
##  [51] progress_1.2.2            zlibbioc_1.34.0          
##  [53] purrr_0.3.4               RCurl_1.98-1.2           
##  [55] prettyunits_1.1.1         openssl_1.4.3            
##  [57] bumphunter_1.30.0         zoo_1.8-8                
##  [59] sfsmisc_1.1-7             magrittr_1.5             
##  [61] data.table_1.13.2         mvtnorm_1.1-1            
##  [63] aroma.light_3.18.0        hms_0.5.3                
##  [65] evaluate_0.14             xtable_1.8-4             
##  [67] XML_3.99-0.5              jpeg_0.1-8.1             
##  [69] mclust_5.4.6              shape_1.4.5              
##  [71] compiler_4.0.2            biomaRt_2.44.4           
##  [73] minfi_1.34.0              tibble_3.0.3             
##  [75] KernSmooth_2.23-17        crayon_1.3.4             
##  [77] R.oo_1.24.0               htmltools_0.5.0          
##  [79] tidyr_1.1.2               geneplotter_1.66.0       
##  [81] expm_0.999-5              Exact_2.1                
##  [83] RcppParallel_5.0.2        DBI_1.1.0                
##  [85] dbplyr_1.4.4              MASS_7.3-53              
##  [87] rappdirs_0.3.1            boot_1.3-25              
##  [89] readr_1.4.0               quadprog_1.5-8           
##  [91] R.methodsS3_1.8.1         igraph_1.2.6             
##  [93] pkgconfig_2.0.3           xml2_1.3.2               
##  [95] foreach_1.5.1             annotate_1.66.0          
##  [97] rngtools_1.5              multtest_2.44.0          
##  [99] beanplot_1.2              doRNG_1.8.2              
## [101] scrime_1.3.5              stringr_1.4.0            
## [103] digest_0.6.25             rmarkdown_2.4            
## [105] base64_2.0                gld_2.6.2                
## [107] DelayedMatrixStats_1.10.1 curl_4.3                 
## [109] lifecycle_0.2.0           jsonlite_1.7.1           
## [111] Rhdf5lib_1.10.1           viridisLite_0.3.0        
## [113] askpass_1.1               pillar_1.4.6             
## [115] httr_1.4.2                survival_3.2-3           
## [117] glue_1.4.2                xts_0.12.1               
## [119] zip_2.1.1                 png_0.1-7                
## [121] iterators_1.0.13          bit_4.0.4                
## [123] class_7.3-17              stringi_1.5.3            
## [125] HDF5Array_1.16.1          blob_1.2.1               
## [127] latticeExtra_0.6-29       memoise_1.1.0            
## [129] dplyr_1.0.2               e1071_1.7-4